rm(list = ls()[ls()!="drop_attn_fails"])
library(tidyverse)
library(readr)
library(psych)
library(survey)
library(cregg)

#drop_attn_fails <- TRUE

if(drop_attn_fails){
  
  attn <- "_attn"
  
} else{
  
  attn <- "_full"
  
}

## Load conjoint attributes from source files

source("plumber_uk.R")
parties_uk <- parties
class_uk <- class
ethnicities_uk <- ethnicities
genders_uk <- genders
sexuality_uk <- sexuality
justification_uk <- justification
personalization_uk <- personalization
responsiveness_uk <- responsiveness
substantive_uk <- substantive
surrogation_uk <- surrogation
descriptive_uk <- c(genders_uk, class_uk, ethnicities_uk, sexuality_uk)

source("plumber_de.R")
parties_de <- parties
class_de <- class
ethnicities_de <- ethnicities
genders_de <- genders
sexuality_de <- sexuality
justification_de <- justification_masculine
personalization_de <- personalization_masculine
responsiveness_de <- responsiveness
substantive_de <- substantive
surrogation_de <- surrogation_masculine
descriptive_de <- c(genders_de, class_de, ethnicities_de, sexuality_de)

source("plumber_us.R")
parties_us <- parties
class_us <- class
ethnicities_us <- ethnicities
genders_us <- genders
sexuality_us <- sexuality
justification_us <- justification
personalization_us <- personalization
responsiveness_us <- responsiveness
substantive_us <- substantive
surrogation_us <- surrogation
descriptive_us <- c(genders_us, class_us, ethnicities_us, sexuality_us)


sub_rep_names <- c("sub_rep_1_1", "sub_rep_1_2", "sub_rep_1_3", "sub_rep_1_4")
surr_rep_names <- c("surr_rep_1_1", "surr_rep_1_2", "surr_rep_1_3")
desc_rep_names <- c("desc_rep_1_1", "desc_rep_1_2", "desc_rep_1_3", "desc_rep_1_4", "desc_rep_1_5")
just_rep_names <- c("just_rep1_1", "just_rep1_2", "just_rep1_3", "just_rep1_4")
pers_rep_names <- c("pers_rep1_1", "pers_rep1_2", "pers_rep1_3", "pers_rep1_4")
resp_rep_names <- c("respons_rep1_1", "respons_rep1_2", "respons_rep1_3")

all_names <- c(sub_rep_names, surr_rep_names, desc_rep_names, just_rep_names, pers_rep_names, resp_rep_names)

## ###########
## Clean data

## Load survey data 

load(file = paste0("../working/survey_data",attn,".Rdata"))

## ###########
## Prep UK conjoint

# Pivot from wide to long (one observation per respondent-conjoint comparison)

uk_mp_1 <- uk %>% select(-starts_with("mp_2_sentence_"))
uk_mp_2 <- uk %>% select(-starts_with("mp_1_sentence_"))

names(uk_mp_1) <- gsub("mp_1_sentence","mp_sentence", names(uk_mp_1))
names(uk_mp_2) <- gsub("mp_2_sentence","mp_sentence", names(uk_mp_2))

uk_mps <- rbind(uk_mp_1, uk_mp_2)

uk_long <- uk_mps %>% 
  pivot_longer(cols = starts_with(c("mp_sentence_")),
               names_to = c(".value","set"),
               names_pattern = "(mp_sentence_.)(.)") 

# Pivot conjoint outcomes from wide to long

conjoint_outcome_uk <- uk_mps %>% 
  pivot_longer(cols = starts_with("conjoint_") & !contains("pair") & !contains("single"),
               names_to = c(".value","set"),
               names_pattern = "(conjoint_)(.)") %>%
  select(conjoint_)

# Add conjoint outcomes to survey data

uk_long$mp_selected <- conjoint_outcome_uk$conjoint_

uk_long$mp_selected <- uk_long$mp_selected == gsub("\\.\\.\\.","", uk_long$mp_sentence_1)

uk_long$task <- rep(1:5, nrow(uk)*2)

# Recode treatments to usable form

uk_long_clean <- uk_long %>% 
  # Find which sentence in treatment corresponds to which attribute
  mutate(across(starts_with("mp_sentence_"),  ~case_when(grepl("for your constituency",.) ~ .),.names = "surrogation_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(justification_uk, collapse = "|"),.) ~ .), .names = "justification_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(parties_uk, collapse = "|"),.) ~ .), .names = "parties_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(personalization_uk, collapse = "|"),.) ~ .), .names = "personalization_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(substantive_uk, collapse = "|"),.) ~ .), .names = "substantive_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(descriptive_uk, collapse = "|"),., ignore.case = T) ~ .), .names = "descriptive_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(responsiveness_uk, collapse = "|"),.) ~ .), .names = "responsive_mp_{col}"))  %>%
  # Assign treatment sentence to attribute variables  
  mutate(surrogation_mp = coalesce(!!! select(., starts_with("surrogation_mp_"))),
         justification_mp = coalesce(!!! select(., starts_with("justification_mp_"))),
         party_mp = coalesce(!!! select(., starts_with("parties_mp_"))),
         personalization_mp = coalesce(!!! select(., starts_with("personalization_mp_"))),
         substantive_mp = coalesce(!!! select(., starts_with("substantive_mp_"))),
         descriptive_mp = coalesce(!!! select(., starts_with("descriptive_mp_"))),
         responsive_mp = coalesce(!!! select(., starts_with("responsive_mp_")))) %>%
# Clean variables
  mutate(party_mp = ifelse(grepl("Conservative", party_mp),"Conservative","Labour")) %>% 
  separate(col = descriptive_mp, into = c("descriptive_1_mp", "descriptive_2_mp"), sep = "and", remove = FALSE)  %>%
  mutate(
    # Descriptive
    descriptive_1_mp = gsub("\\.\\.\\.|^ | $|\\.|is ","",sub("<strong>(.*)</strong>*", "\\1", descriptive_1_mp)),
    descriptive_2_mp = gsub("\\.\\.\\.|^ | $|\\.|is ","",sub("<strong>(.*)</strong>*", "\\1", descriptive_2_mp)),
    gender_match = recode(Gender,
                             "Male" = "a man",
                             "Female" = "a woman",
                             "Other" = "other",
                             "Prefer not to say" = "other"),
    ethnicity_match = recode(Ethnicity, 
                                "White British" = "white",
                                "Black African" = "black",
                                "Black Caribbean" = "black",
                                "White and Black African" = "black",
                                "White and Black Caribbean" = "black",
                                "Pakistani" = "Asian",
                                "Indian" = "Asian",
                                "Chinese" = "Asian",
                                "Bangladeshi" = "Asian",
                                "Any other Asian background" = "Asian",
                                "White and Asian" = "Asian",
                                "Any other white background" = "white",
                                "Any other mixed background" = "Other",
                                "Other ethnic group" = "Other",
                                "Any other black background" = "black",
                                .default = NA_character_),
    
    sexuality_match = recode(Sexuality, 
                                "Heterosexual" = "heterosexual",
                                "Homosexual" = "homosexual",
                                "Bisexual" = "homosexual",
                                "Other" = "Other",
                                .default = NA_character_),
    class_match = tolower(Class),
    descriptive_1_match = case_when(descriptive_1_mp == class_match ~ 1, 
                                       descriptive_1_mp == sexuality_match ~ 1, 
                                       descriptive_1_mp == ethnicity_match ~ 1, 
                                       descriptive_1_mp == gender_match ~ 1,
                                       TRUE ~ 0),
    descriptive_2_match = case_when(descriptive_2_mp == class_match ~ 1, 
                                       descriptive_2_mp == sexuality_match ~ 1, 
                                       descriptive_2_mp == ethnicity_match ~ 1, 
                                       descriptive_2_mp == gender_match ~ 1,
                                       TRUE ~ 0),
    Descriptive = descriptive_1_match + descriptive_2_match,
    Descriptive = factor(Descriptive, labels = paste0(sort(unique(Descriptive)), " shared characteristic(s)")),
    #Substantive
    substantive_position_mp = ifelse(grepl("supports", substantive_mp),"Support","Oppose"),
    substantive_issue_mp= gsub("...<strong>opposes</strong> |...<strong>supports</strong> ","",substantive_mp),
    Substantive = case_when(substantive_issue_mp == "building HS2." ~ hs2 == substantive_position_mp,
                            substantive_issue_mp == "increasing the Universal Credit grant." ~ u_credit == substantive_position_mp,
                            substantive_issue_mp == "constructing thousands of new onshore wind turbines." ~ wind_turbines == substantive_position_mp,
                            substantive_issue_mp == "reintroducing the death penalty for the crime of murder." ~ death_penalty == substantive_position_mp,
                            substantive_issue_mp == "increasing the rate of tax on income over £50,000 to 45%." ~ inc_tax == substantive_position_mp,
                            substantive_issue_mp == "introducing more stringent measures to reduce immigration." ~ immigration == substantive_position_mp),
    Substantive = factor(ifelse(Substantive, "    Congruent", "    Incongruent"), levels = c("    Incongruent", "    Congruent")),
    Substantive_Importance = case_when(substantive_issue_mp == "building HS2." ~ salience_2,
                            substantive_issue_mp == "increasing the Universal Credit grant." ~ salience_3,
                            substantive_issue_mp == "constructing thousands of new onshore wind turbines." ~ salience_1,
                            substantive_issue_mp == "reintroducing the death penalty for the crime of murder." ~ salience_6,
                            substantive_issue_mp == "increasing the rate of tax on income over £50,000 to 45%." ~ salience_4,
                            substantive_issue_mp == "introducing more stringent measures to reduce immigration." ~ salience_5),
    # Surrogation
    Surrogation = factor(ifelse(grepl("not for your constituency", surrogation_mp), "MP for another constituency", "MP for respondent's constituency"),
                         levels = c("MP for respondent's constituency", "MP for another constituency")),
    PartisanSurrogation = factor(ifelse(gen_election_retro == party_mp,  "MP from respondent's party", "MP from another party"),
                                 levels  =c("MP from respondent's party", "MP from another party")),
    # Justification
    Justification = as.factor(ifelse(grepl("society as a whole|the national interest|the whole of the UK",justification_mp),
                                     "Society as a whole", 
                                     "People like you")),
    # Personalization
    Personalization = factor(
      case_when(grepl(paste0(personalization_uk[c(1,4,6)], collapse = "|"), personalization_mp) ~ "High personalization",
                grepl(paste0(personalization_uk[c(2,3,5)], collapse = "|"), personalization_mp) ~ "Low personalization"),
      levels = c("Low personalization", "High personalization")),
    # Responsiveness
    Responsiveness = factor(
      case_when(grepl(paste0(responsiveness_uk[c(1,2,3,4)], collapse = "|"), responsive_mp) ~ "High responsiveness",
                grepl(paste0(responsiveness_uk[c(5,6,7,8)], collapse = "|"), responsive_mp) ~ "Low responsiveness"),
      levels = c("Low responsiveness", "High responsiveness")),
    # Recode FA scores to high and low
    Descriptive_FA_Group = factor(cut_number(desc_fa, 3), labels = c("Lo", "Mid", "Hi")),
    Substantive_FA_Group = factor(cut_number(sub_fa, 3), labels = c("Lo", "Mid", "Hi")),
    Surrogation_FA_Group = factor(cut_number(surr_fa, 3), labels = c("Lo", "Mid", "Hi")),
    Personalization_FA_Group = factor(cut_number(pers_fa, 3), labels = c("Lo", "Mid", "Hi")),
    Responsiveness_FA_Group = factor(cut_number(resp_fa, 3), labels = c("Lo", "Mid", "Hi")),
    Justification_FA_Group = factor(cut_number(just_fa, 3), labels = c("Lo", "Mid", "Hi"))
    ) %>%
  filter(!(is.na(class_match) & grepl("class", descriptive_2_mp))) %>% # Missing class 
  filter(!(is.na(sexuality_match) & grepl("heterosexual|homosexual", descriptive_2_mp))) %>% # Missing sexuality 
  filter(!(is.na(ethnicity_match) & grepl("Asian|black|white", descriptive_2_mp))) %>% # Missing ethnicity 
  filter(!is.na(gen_election_retro)) # Missing party affiliation

## ###########
## Prep US conjoint

# Pivot from wide to long (one observation per respondent-conjoint comparison)

us_mp_1 <- us %>% select(-starts_with("mp_2_sentence_"))
us_mp_2 <- us %>% select(-starts_with("mp_1_sentence_"))

names(us_mp_1) <- gsub("mp_1_sentence","mp_sentence", names(us_mp_1))
names(us_mp_2) <- gsub("mp_2_sentence","mp_sentence", names(us_mp_2))

us_mps <- rbind(us_mp_1, us_mp_2)

us_long <- us_mps %>% 
  pivot_longer(cols = starts_with(c("mp_sentence_")),
               names_to = c(".value","set"),
               names_pattern = "(mp_sentence_.)(.)") 

# Pivot conjoint outcomes from wide to long

conjoint_outcome_us <- us_mps %>% 
  pivot_longer(cols = starts_with("conjoint_") & !contains("pair") & !contains("single"),
               names_to = c(".value","set"),
               names_pattern = "(conjoint_)(.)") %>%
  select(conjoint_)

# Add conjoint outcomes to survey data

us_long$mp_selected <- conjoint_outcome_us$conjoint_

us_long$mp_selected <- us_long$mp_selected == gsub("\\.\\.\\.","", us_long$mp_sentence_1)

us_long$task <- rep(1:5, nrow(us)*2)

# Recode treatments to usable form

us_long_clean <- us_long %>% 
  # Find which sentence in treatment corresponds to which attribute
  mutate(across(starts_with("mp_sentence_"),  ~case_when(grepl("for your congressional district",.) ~ .),.names = "surrogation_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(justification_us, collapse = "|"),.) ~ .), .names = "justification_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(parties_us, collapse = "|"),.) ~ .), .names = "parties_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(personalization_us, collapse = "|"),.) ~ .), .names = "personalization_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(gsub(" \\$510,000 to 40\\%","", paste0(substantive_us, collapse = "|")),.) ~ .), .names = "substantive_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(descriptive_us, collapse = "|"),.) ~ .), .names = "descriptive_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(responsiveness_us, collapse = "|"),.) ~ .), .names = "responsive_mp_{col}"))  %>%
  # Assign treatment sentence to attribute variables  
  mutate(surrogation_mp = coalesce(!!! select(., starts_with("surrogation_mp_"))),
         justification_mp = coalesce(!!! select(., starts_with("justification_mp_"))),
         party_mp = coalesce(!!! select(., starts_with("parties_mp_"))),
         personalization_mp = coalesce(!!! select(., starts_with("personalization_mp_"))),
         substantive_mp = coalesce(!!! select(., starts_with("substantive_mp_"))),
         descriptive_mp = coalesce(!!! select(., starts_with("descriptive_mp_"))),
         responsive_mp = coalesce(!!! select(., starts_with("responsive_mp_")))) %>%
  # Clean variables
  mutate(party_mp = ifelse(grepl("Republican", party_mp),"Republican","Democrat")) %>% 
  separate(col = descriptive_mp, into = c("descriptive_1_mp", "descriptive_2_mp"), sep = "and", remove = FALSE)  %>%
  mutate(
    # Descriptive
    descriptive_1_mp = gsub("\\.\\.\\.|^ | $|\\.|is ","",sub("<strong>(.*)</strong>*", "\\1", descriptive_1_mp)),
    descriptive_2_mp = gsub("\\.\\.\\.|^ | $|\\.|is ","",sub("<strong>(.*)</strong>*", "\\1", descriptive_2_mp)),
    gender_match = recode(Gender,
                          "Male" = "a man",
                          "Female" = "a woman",
                          "Other" = "other",
                          .default = NA_character_),
    ethnicity_match = recode(Ethnicity, 
                             "Asian" = "Asian",
                             "Black" = "Black",
                             "Hispanic" = "Hispanic",
                             "Middle Eastern" = "Other",
                             "Mixed" = "Other",
                             "Native American" = "Other",
                             "Other" = "Other",
                             "White" = "White",
                             .default = NA_character_),
    Hispanic = ifelse(is.na(Hispanic),"Yes",Hispanic),
    ethnicity_match = ifelse(Hispanic == "Yes", "Hispanic", ethnicity_match),
    sexuality_match = tolower(Sexuality),
    class_match = tolower(Class),
    house_election_retro = ifelse(is.na(house_election_retro), "Don't know", house_election_retro),
    party_match = recode(house_election_retro,
                         "Democratic candidate" = "Democrat",
                         "Republican candidate" = "Republican",
                         "Independent candidate" = "Independent candidate",
                         "Other candidate" = "Other candidate",
                         "Did not vote" = "Did not vote",
                         "Was not eligible to vote" = "Was not eligible to vote"
                         ),
    descriptive_1_match = case_when(descriptive_1_mp == class_match ~ 1, 
                                    descriptive_1_mp == sexuality_match ~ 1, 
                                    descriptive_1_mp == ethnicity_match ~ 1, 
                                    descriptive_1_mp == gender_match ~ 1,
                                    TRUE ~ 0),
    descriptive_2_match = case_when(descriptive_2_mp == class_match ~ 1, 
                                    descriptive_2_mp == sexuality_match ~ 1, 
                                    descriptive_2_mp == ethnicity_match ~ 1, 
                                    descriptive_2_mp == gender_match ~ 1,
                                    TRUE ~ 0),
    Descriptive = descriptive_1_match + descriptive_2_match,
    Descriptive = factor(Descriptive, labels = paste0(sort(unique(Descriptive)), " shared characteristic(s)")),
    #Substantive
    substantive_position_mp = ifelse(grepl("supports", substantive_mp),"Support","Oppose"),
    substantive_issue_mp= gsub("...<strong>opposes</strong> |...<strong>supports</strong> ","",substantive_mp),
    Substantive = case_when(substantive_issue_mp == "abolishing the death penalty for persons convicted of murder." ~ death_penalty == substantive_position_mp,
                            substantive_issue_mp == "introducing more stringent measures to reduce immigration." ~ immigration == substantive_position_mp,
                            substantive_issue_mp == "increasing the federal tax rate on income over $510,000 to 40%." ~ inc_tax == substantive_position_mp,
                            substantive_issue_mp == "increasing federal spending on welfare programs." ~ welfare == substantive_position_mp,
                            substantive_issue_mp == "introducing a federal carbon tax." ~ carbon_tax == substantive_position_mp,
                            substantive_issue_mp == "creating a federal school voucher program." ~ wind_turbines == substantive_position_mp), # wind_turbines is correct here, we forgot to rename in qualtrics
    Substantive = factor(ifelse(Substantive, "    Congruent", "    Incongruent"), levels = c("    Incongruent", "    Congruent")),
    Substantive_Importance = case_when(substantive_issue_mp == "abolishing the death penalty for persons convicted of murder." ~ salience_6,
                                       substantive_issue_mp == "introducing more stringent measures to reduce immigration." ~ salience_5,
                                       substantive_issue_mp == "increasing the federal tax rate on income over $510,000 to 40%." ~ salience_4,
                                       substantive_issue_mp == "increasing federal spending on welfare programs." ~ salience_3,
                                       substantive_issue_mp == "introducing a federal carbon tax." ~ salience_2,
                                       substantive_issue_mp == "creating a federal school voucher program." ~ salience_1),
    # Surrogation
    Surrogation = factor(ifelse(grepl("not for your congressional district", surrogation_mp), "Representative for another district", "Representative for respondent's district"),
                         levels = c("Representative for respondent's district", "Representative for another district")),
    PartisanSurrogation = factor(ifelse(party_match == party_mp,  "Representative from respondent's party", "Representative from another party"),
                                 levels  =c("Representative from respondent's party", "Representative from another party")),
    # Justification
    Justification = as.factor(ifelse(grepl("society as a whole|the national interest|the whole of the US",justification_mp),
                                     "Society as a whole", 
                                     "People like you")),
    # Personalization
    Personalization = factor(
      case_when(grepl(paste0(personalization_us[c(1,4,6)], collapse = "|"), personalization_mp) ~ "High personalization",
                grepl(paste0(personalization_us[c(2,3,5)], collapse = "|"), personalization_mp) ~ "Low personalization"),
      levels = c("Low personalization", "High personalization")),
    # Responsiveness
    Responsiveness = factor(
      case_when(grepl(paste0(responsiveness_us[c(1,2,3,4)], collapse = "|"), responsive_mp) ~ "High responsiveness",
                grepl(paste0(responsiveness_us[c(5,6,7,8)], collapse = "|"), responsive_mp) ~ "Low responsiveness"),
      levels = c("Low responsiveness", "High responsiveness")),
    # Recode FA scores to high and low
    Descriptive_FA_Group = factor(cut_number(desc_fa,3), labels = c("Lo", "Mid", "Hi")),
    Substantive_FA_Group = factor(cut_number(sub_fa,3), labels = c("Lo", "Mid", "Hi")),
    Surrogation_FA_Group = factor(cut_number(surr_fa,3), labels = c("Lo", "Mid", "Hi")),
    Personalization_FA_Group = factor(cut_number(pers_fa,3), labels = c("Lo", "Mid", "Hi")),
    Responsiveness_FA_Group = factor(cut_number(resp_fa,3), labels = c("Lo", "Mid", "Hi")),
    Justification_FA_Group = factor(cut_number(just_fa,3), labels = c("Lo", "Mid", "Hi"))
  ) %>%
  filter(!(sexuality_match == "prefer not to say" & grepl("heterosexual|homosexual", descriptive_2_mp))) %>% # Missing sexuality 
  filter(!(is.na(class_match) & grepl("class", descriptive_2_mp))) %>% # Missing class 
  filter(!is.na(gen_election_retro)) # Missing party

## ###########
## Prep DE conjoint

# Pivot from wide to long (one observation per respondent-conjoint comparison)

de_mp_1 <- de %>% select(-starts_with("mp_2_sentence_"))
de_mp_2 <- de %>% select(-starts_with("mp_1_sentence_"))

names(de_mp_1) <- gsub("mp_1_sentence","mp_sentence", names(de_mp_1))
names(de_mp_2) <- gsub("mp_2_sentence","mp_sentence", names(de_mp_2))

de_mps <- rbind(de_mp_1, de_mp_2)

de_long <- de_mps %>% 
  pivot_longer(cols = starts_with(c("mp_sentence_")),
               names_to = c(".value","set"),
               names_pattern = "(mp_sentence_.)(.)") 

# Pivot conjoint outcomes from wide to long

conjoint_outcome_de <- de_mps %>% 
  pivot_longer(cols = starts_with("conjoint_") & !contains("pair") & !contains("single"),
               names_to = c(".value","set"),
               names_pattern = "(conjoint_)(.)") %>%
  select(conjoint_)

# Add conjoint outcomes to survey data

de_long$mp_selected <- conjoint_outcome_de$conjoint_

de_long$mp_selected <- de_long$mp_selected == gsub("\\.\\.\\.","", de_long$mp_sentence_1)

de_long$task <- rep(1:5, nrow(de)*2)

# Recode treatments to usable form

de_long_clean <- de_long %>% 
  # Find which sentence in treatment corresponds to which attribute
  mutate(across(starts_with("mp_sentence_"),  ~case_when(grepl("für Ihren Wahlkreis|in Ihrem Bundesland|für Ihr Bundesland|für ein anderes Bundesland|einem anderen Bundesland",.) ~ .),.names = "surrogation_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(c(justification_masculine,justification_feminine), collapse = "|"),.) ~ .), .names = "justification_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(parties_de, collapse = "|"),.) ~ .), .names = "parties_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(c(personalization_feminine,personalization_masculine), collapse = "|"),.) ~ .), .names = "personalization_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(substantive_de, collapse = "|"),.) ~ .), .names = "substantive_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(descriptive_de[-6], collapse = "|"),.) ~ .), .names = "descriptive_mp_{col}"),
         across(starts_with("mp_sentence_"),  ~case_when(grepl(paste0(responsiveness_de, collapse = "|"),.) ~ .), .names = "responsive_mp_{col}")
         ) %>%
  mutate(surrogation_mp = coalesce(!!! select(., starts_with("surrogation_mp_"))),
         justification_mp = coalesce(!!! select(., starts_with("justification_mp_"))),
         party_mp = coalesce(!!! select(., starts_with("parties_mp_"))),
         personalization_mp = coalesce(!!! select(., starts_with("personalization_mp_"))),
         substantive_mp = coalesce(!!! select(., starts_with("substantive_mp_"))),
         descriptive_mp = coalesce(!!! select(., starts_with("descriptive_mp_"))),
         responsive_mp = coalesce(!!! select(., starts_with("responsive_mp_")))) %>%
  # Clean variables
  mutate(party_mp = case_when(grepl("SPD",party_mp) ~ "SPD",
                              grepl("CDU/CSU",party_mp) ~ "CDU/CSU",
                              grepl("Bündnis 90/Grüne",party_mp) ~ "Bündnis 90/Grüne"),
         descriptive_mp = ifelse(descriptive_mp %in% c("...ist ein <strong>Mann</strong>.", "...ist eine <strong>Frau</strong>."), paste0(descriptive_mp," und Nein Migrationshintergrund"), descriptive_mp)) %>% 
  separate(col = descriptive_mp, into = c("descriptive_1_mp", "descriptive_2_mp"), sep = " und ", remove = FALSE)  %>%
  mutate(
    # Descriptive
    descriptive_1_mp = gsub("\\.\\.\\.|^ | $|\\.|is ","",sub("<strong>(.*)</strong>*", "\\1", descriptive_1_mp)),
    descriptive_2_mp = gsub("\\.\\.\\.|^ | $|\\.|is ","",sub("<strong>(.*)</strong>*", "\\1", descriptive_2_mp)),
    gender_match = recode(Gender,
                          "Männlich" = "ist ein Mann",
                          "Weiblich" = "ist eine Frau"),
    ethnicity_match = recode(Migr_back, 
                             "Ja, polnischer Migrationshintergrund" = "hat einen polnischen Migrationshintergrund",
                             "Ja, russischer Migrationshintergrund" = "hat einen russischen Migrationshintergrund",
                             "Ja, türkischer Migrationshintergrund" = "hat einen türkischen Migrationshintergrund",
                             "Nein" = "Nein Migrationshintergrund",
                             "Ja, anderer Migrationshintergrund" = "Ja, anderer Migrationshintergrund",
                             "Keine Angabe" = "Keine Angabe"),
    sexuality_match = recode(Sexuality,
                             "Heterosexuell" = "ist heterosexuell",
                             "Homosexuell" = "ist homosexuell"),
    class_match = recode(Class,
                         "Arbeiterschicht" = "kommt aus der Arbeiterschicht",
                         "Mittelschicht" = "kommt aus der Mittelschicht",
                         "Oberschicht" = "kommt aus der Oberschicht"),
    party_match = recode(gen_election_retro,
                         "Bündnis 90/Die Grünen" = "Bündnis 90/Grüne"),
    descriptive_1_match = case_when(descriptive_1_mp == class_match ~ 1, 
                                    descriptive_1_mp == sexuality_match ~ 1, 
                                    descriptive_1_mp == ethnicity_match ~ 1, 
                                    descriptive_1_mp == gender_match ~ 1,
                                    TRUE ~ 0),
    descriptive_2_match = case_when(descriptive_2_mp == class_match ~ 1, 
                                    descriptive_2_mp == sexuality_match ~ 1, 
                                    descriptive_2_mp == ethnicity_match ~ 1, 
                                    descriptive_2_mp == gender_match ~ 1,
                                    TRUE ~ 0),
    Descriptive = descriptive_1_match + descriptive_2_match,
    Descriptive = factor(Descriptive, labels = paste0(sort(unique(Descriptive)), " shared characteristic(s)")),
    #Substantive
    substantive_position_mp = ifelse(grepl("befürwortet", substantive_mp),"Befürworten","Ablehnen"),
    substantive_issue_mp= gsub("...<strong>lehnt</strong> |...<strong>befürwortet</strong> | <strong>ab</strong>","",substantive_mp),
    Substantive = case_when(substantive_issue_mp == "den Bau von Nord Stream 2." ~ ns2 == substantive_position_mp,
                            substantive_issue_mp == "die Abschaffung des Werbeverbots für Schwangerschaftsabbrüche." ~ abortion == substantive_position_mp,
                            substantive_issue_mp == "die Einführung strengerer Maßnahmen zur Reduzierung der Einwanderung." ~ immigration == substantive_position_mp,
                            substantive_issue_mp == "die Erhöhung der Luftfahrtssteuer." ~ co2tax == substantive_position_mp,
                            substantive_issue_mp == "die Erhöhung des Steuersatzes auf Einkommen über 55.961 Euro auf 45%." ~ inc_tax == substantive_position_mp,
                            substantive_issue_mp == "die Erhöhung von Hartz IV." ~ hartz == substantive_position_mp), 
    Substantive = factor(ifelse(Substantive, "    Congruent", "    Incongruent"), levels = c("    Incongruent", "    Congruent")),
    Substantive_Importance = case_when(substantive_issue_mp == "den Bau von Nord Stream 2." ~ salience_5,
                                       substantive_issue_mp == "die Abschaffung des Werbeverbots für Schwangerschaftsabbrüche." ~ salience_1,
                                       substantive_issue_mp == "die Einführung strengerer Maßnahmen zur Reduzierung der Einwanderung." ~ salience_2,
                                       substantive_issue_mp == "die Erhöhung der Luftfahrtssteuer." ~ salience_6,
                                       substantive_issue_mp == "die Erhöhung des Steuersatzes auf Einkommen über 55.961 Euro auf 45%." ~ salience_3,
                                       substantive_issue_mp == "die Erhöhung von Hartz IV." ~ salience_4),
    # Surrogation
    Surrogation = factor(case_when(grepl("für Ihren Wahlkreis", surrogation_mp) ~ "MP for respondent's constituency",
                            grepl("für Ihr Bundesland", surrogation_mp) ~ "MP for respondent's state",
                            grepl("für einen anderen Wahlkreis in Ihrem Bundesland", surrogation_mp) ~ "MP for another constituency in respondent's state",
                            grepl("für einen anderen Wahlkreis in einem anderen Bundesland", surrogation_mp) ~ "MP for another constituency in another state",
                            grepl("für ein anderes Bundesland", surrogation_mp) ~ "MP for another state"),
                         levels = c("MP for respondent's constituency", 
                                    "MP for respondent's state", 
                                    "MP for another constituency in respondent's state",
                                    "MP for another constituency in another state",
                                    "MP for another state")),
    PartisanSurrogation = factor(ifelse(party_match == party_mp,  "Representative from respondent's party", "Representative from another party"),
                                 levels  =c("Representative from respondent's party", "Representative from another party")),
    # Justification
    Justification = as.factor(ifelse(grepl("die gesamte Gesellschaft|den nationalen Interessen|ganz Deutschland",justification_mp),
                                     "Society as a whole", 
                                     "People like you")),
    # Personalization
    Personalization = factor(
      case_when(grepl(paste0(c(personalization_feminine[c(1,4,6)], personalization_masculine[c(1,4,6)]), collapse = "|"), personalization_mp) ~ "High personalization",
                grepl(paste0(c(personalization_feminine[c(2,3,5)], personalization_masculine[c(2,3,5)]), collapse = "|"), personalization_mp) ~ "Low personalization"),
      levels = c("Low personalization", "High personalization")),
    # Responsiveness
    Responsiveness = factor(
      case_when(grepl(paste0(responsiveness_de[c(1,2,3,4)], collapse = "|"), responsive_mp) ~ "High responsiveness",
                grepl(paste0(responsiveness_de[c(5,6,7,8)], collapse = "|"), responsive_mp) ~ "Low responsiveness"),
      levels = c("Low responsiveness", "High responsiveness")),
    # Recode FA scores to high and low
    Descriptive_FA_Group = factor(cut_number(desc_fa,3), labels = c("Lo", "Mid", "Hi")),
    Substantive_FA_Group = factor(cut_number(sub_fa,3), labels = c("Lo", "Mid", "Hi")),
    Surrogation_FA_Group = factor(cut_number(surr_fa,3), labels = c("Lo", "Mid", "Hi")),
    Personalization_FA_Group = factor(cut_number(pers_fa,3), labels = c("Lo", "Mid", "Hi")),
    Responsiveness_FA_Group = factor(cut_number(resp_fa,3), labels = c("Lo", "Mid", "Hi")),
    Justification_FA_Group = factor(cut_number(just_fa,3), labels = c("Lo", "Mid", "Hi"))
  ) %>%
  filter(!(ethnicity_match == "Keine Angabe" & descriptive_2_mp %in% c("hat einen polnischen Migrationshintergrund", "hat einen russischen Migrationshintergrund", "hat einen türkischen Migrationshintergrund", "Nein Migrationshintergrund"))) %>% # Missing migrant background
  filter(!(ethnicity_match == "Keine Angabe" & descriptive_2_mp %in% c("ist heterosexuell", "ist homosexuell"))) %>% # Missing sexuality
  filter(!(is.na(class_match) & descriptive_2_mp %in% c("kommt aus der Arbeiterschicht", "kommt aus der Mittelschicht", "kommt aus der Oberschicht"))) %>% # Missing class
  filter(!is.na(gen_election_retro)) # Missing party

save(uk_long_clean, us_long_clean, de_long_clean, file = paste0("../working/conjoint_data",attn,".Rdata"))

writeLines(as.character(nrow(uk_long_clean)), con = paste0("../output/useful_numbers/uk",attn,"_conjoint__sample.txt"))
writeLines(as.character(nrow(de_long_clean)), con = paste0("../output/useful_numbers/de",attn,"_conjoint_sample.txt"))
writeLines(as.character(nrow(us_long_clean)), con = paste0("../output/useful_numbers/us",attn,"_conjoint__sample.txt"))

